# Summary: Creates SI F

##############################################
#----------------- SI F ---------------------#
##############################################

options(scipen=999)
rm(list = ls())

# Load libraries
library(tidyverse)
library(sandwich)
library(lmtest)
library(ggeffects)
library(margins)
library(DescTools)
library(insight)
library(msm)
library(lme4)
library(srvyr)
library(survey)
library(xtable)
library(texreg)
library(MASS)

# Set working directory
# setwd('~/Dataverse/')

# Load datasets
load(file = "./Data/df.RData")
load(file = "./Data/df_long.RData")

##############
# TABLE F.11 #
##############

# Linear regression
m1_n_offices = lm(n_offices_considered ~ racegender +
                    political_interest +
                    n_offices_qualified +
                    partyID +
                    encouraged +
                    AGE7 +
                    education +
                    income +
                    married, 
                  data = df)

m1_n_offices_robust = coeftest(m1_n_offices, vcov. = vcovCL(m1_n_offices, type = "HC1"))

# Negative binomial
m1_n_offices_nb = glm.nb(n_offices_considered ~ racegender +
                           political_interest +
                           n_offices_qualified +
                           partyID +
                           encouraged +
                           AGE7 +
                           education +
                           income +
                           married, 
                         data = df)

m1_n_offices_nb_robust = coeftest(m1_n_offices_nb, vcov. = vcovCL(m1_n_offices_nb, type = "HC1"))

texreg(
  list(m1_n_offices_robust, m1_n_offices_nb_robust),
  file = "./SI_F_Results/tableF11.txt"
)


##############
# TABLE F.12 #
##############

m3_n_offices = lm(n_offices_considered ~ racegender +
                           racegender*encouraged +
                           political_interest +
                           n_offices_qualified +
                           partyID +
                           encouraged +
                           AGE7 +
                           education +
                           income +
                           married, 
                         data = df)

m3_n_offices_robust = coeftest(m3_n_offices, vcov. = vcovCL(m3_n_offices, type = "HC1"))

# Negative binomial
m3_n_offices_nb = glm.nb(n_offices_considered ~ racegender +
                                  racegender*encouraged +
                                  political_interest +
                                  n_offices_qualified +
                                  partyID +
                                  encouraged +
                                  AGE7 +
                                  education +
                                  income +
                                  married, 
                                data = df)

m3_n_offices_nb_robust = coeftest(m3_n_offices_nb, vcov. = vcovCL(m3_n_offices_nb, type = "HC1"))

texreg(
  list(m3_n_offices_robust, m3_n_offices_nb_robust),
  file = "./SI_F_Results/tableF12.txt"
)


##############
# TABLE F.13 #
##############

m2_random = glmer(considered_running ~ racegender + office +
                    racegender*office +
                    political_interest +
                    qualified_office +
                    partyID +
                    encouraged +
                    AGE7 +
                    education +
                    income +
                    married +
                    (1 | CaseId), 
                  data = df_long,
                  family = binomial,
                  control = glmerControl(optimizer = "bobyqa"),
                  nAGQ = 0)

texreg(
  m2_random,
  file = "./SI_F_Results/tableF13.txt"
)


##############
# TABLE F.14 #
##############

# School Board
df_long_by_office_school = df_long %>%
  dplyr::filter(office=="School Board")

m_office_school = glm(considered_running ~ racegender +
                        political_interest +
                        qualified_office +
                        partyID +
                        encouraged +
                        AGE7 +
                        education +
                        income +
                        married, 
                      data = df_long_by_office_school,
                      family = binomial)

m_office_school_robust = coeftest(m_office_school, vcov. = vcovHC(m_office_school, type="HC1"))

# City Council
df_long_by_office_city = df_long %>%
  dplyr::filter(office=="City Council")

m_office_city = glm(considered_running ~ racegender +
                      political_interest +
                      qualified_office +
                      partyID +
                      encouraged +
                      AGE7 +
                      education +
                      income +
                      married, 
                    data = df_long_by_office_city,
                    family = binomial)

m_office_city_robust = coeftest(m_office_city, vcov. = vcovHC(m_office_city, type="HC1"))

# Mayor
df_long_by_office_mayor = df_long %>%
  dplyr::filter(office=="Mayor")

m_office_mayor = glm(considered_running ~ racegender +
                       political_interest +
                       qualified_office +
                       partyID +
                       encouraged +
                       AGE7 +
                       education +
                       income +
                       married, 
                     data = df_long_by_office_mayor,
                     family = binomial)

m_office_mayor_robust = coeftest(m_office_mayor, vcov. = vcovHC(m_office_mayor, type="HC1"))

# State Legislature
df_long_by_office_state = df_long %>%
  dplyr::filter(office=="State Legislature")

m_office_state = glm(considered_running ~ racegender +
                       political_interest +
                       qualified_office +
                       partyID +
                       encouraged +
                       AGE7 +
                       education +
                       income +
                       married, 
                     data = df_long_by_office_state,
                     family = binomial)

m_office_state_robust = coeftest(m_office_state, vcov. = vcovHC(m_office_state, type="HC1"))

# Governor
df_long_by_office_governor = df_long %>%
  dplyr::filter(office=="Governor")

m_office_governor = glm(considered_running ~ racegender +
                          political_interest +
                          qualified_office +
                          partyID +
                          encouraged +
                          AGE7 +
                          education +
                          income +
                          married, 
                        data = df_long_by_office_governor,
                        family = binomial)

m_office_governor_robust = coeftest(m_office_governor, vcov. = vcovHC(m_office_governor, type="HC1"))

# US House
df_long_by_office_house = df_long %>%
  dplyr::filter(office=="US House")

m_office_house = glm(considered_running ~ racegender +
                       political_interest +
                       qualified_office +
                       partyID +
                       encouraged +
                       AGE7 +
                       education +
                       income +
                       married, 
                     data = df_long_by_office_house,
                     family = binomial)

m_office_house_robust = coeftest(m_office_house, vcov. = vcovHC(m_office_house, type="HC1"))

# US Senate
df_long_by_office_senate = df_long %>%
  dplyr::filter(office=="US Senate")

m_office_senate = glm(considered_running ~ racegender +
                        political_interest +
                        qualified_office +
                        partyID +
                        encouraged +
                        AGE7 +
                        education +
                        income +
                        married, 
                      data = df_long_by_office_senate,
                      family = binomial)

m_office_senate_robust = coeftest(m_office_senate, vcov. = vcovHC(m_office_senate, type="HC1"))

# President
df_long_by_office_pres = df_long %>%
  dplyr::filter(office=="President")

m_office_pres = glm(considered_running ~ racegender +
                      political_interest +
                      qualified_office +
                      partyID +
                      encouraged +
                      AGE7 +
                      education +
                      income +
                      married, 
                    data = df_long_by_office_pres,
                    family = binomial)

m_office_pres_robust = coeftest(m_office_pres, vcov. = vcovHC(m_office_pres, type="HC1"))

texreg(
  list(m_office_school_robust,m_office_city_robust,m_office_mayor_robust,
       m_office_state_robust,m_office_governor_robust,
       m_office_house_robust,m_office_senate_robust,m_office_pres_robust),
  file = "./SI_F_Results/tableF14.txt"
)


##############
# TABLE F.15 #
##############

df_WEIGHT1 = df %>%
  dplyr::filter(!is.na(considered_running),
                !is.na(racegender),
                !is.na(political_interest),
                !is.na(n_offices_qualified),
                !is.na(partyID),
                !is.na(encouraged),
                !is.na(AGE7),
                !is.na(education),
                !is.na(income),
                !is.na(married))

df_WEIGHT1 = df_WEIGHT1 %>%
  as_survey(weights = c(WEIGHT1))

eq1 = considered_running ~ racegender +
  political_interest +
  n_offices_qualified +
  partyID +
  encouraged +
  AGE7 +
  education +
  income +
  married

m1_WEIGHT1 = svyglm(eq1, family=quasibinomial(link='logit'), design = df_WEIGHT1, na.action = na.omit)

texreg(
  m1_WEIGHT1,
  file = "./SI_F_Results/tableF15.txt"
)


##############
# FIGURE F.1 #
##############

AME_m1_weights = summary(margins(m1_WEIGHT1, design = df_WEIGHT1)) %>% 
  dplyr::filter(grepl("race",factor)) %>% 
  as.data.frame()

AME_m1_weights$factor = gsub("racegender","", AME_m1_weights$factor)

AME_m1_weights = AME_m1_weights %>%
  mutate(racegender = factor(factor, levels = c("Black Men","Hispanic Men","Black Women","Hispanic Women","White Women")))

ggplot(AME_m1_weights, aes(x=racegender, y=AME)) + 
  geom_pointrange(aes(ymin=lower, ymax=upper), position = position_dodge2(width = 0.5)) +
  theme_bw() +
  theme(legend.position="bottom") +
  theme(legend.title=element_blank()) +
  xlab("") + ylab("\nAME: Gender and Race\n") +
  theme(text = element_text(size=25))  +
  geom_hline(yintercept = 0, linetype="dotted",
             color = "red", linewidth=1) +
  scale_y_continuous(limits = c(-0.23, 0.1)) +
  coord_flip() 

ggsave(file="./SI_F_Results/figureF1.png", width = 25, height = 17, units = "cm")


##############
# TABLE F.16 #
##############

df_long_WEIGHT1 = df_long %>%
  dplyr::filter(!is.na(considered_running),
                !is.na(racegender),
                !is.na(political_interest),
                !is.na(qualified_office),
                !is.na(partyID),
                !is.na(encouraged),
                !is.na(AGE7),
                !is.na(education),
                !is.na(income),
                !is.na(married),
                !is.na(office),
                !is.na(CaseId))

cluster = svydesign(id = ~CaseId, weights = ~WEIGHT1, data = df_long_WEIGHT1)

eq2 = considered_running ~ racegender + office +
  racegender*office +
  political_interest +
  qualified_office +
  partyID +
  encouraged +
  AGE7 +
  education +
  income +
  married

m2_WEIGHT1 = svyglm(eq2, family=quasibinomial(link='logit'), design = cluster, na.action = na.omit)

texreg(
  m2_WEIGHT1,
  file = "./SI_F_Results/tableF16.txt"
)


##############
# FIGURE F.2 #
##############

racegender_ops = paste0("racegender",  c("White Women","Hispanic Men","Hispanic Women","Black Men","Black Women"))  

AME_m2_weights = summary(margins(m2_WEIGHT1, design = cluster, 
                                            at = list(office = c("School Board","City Council","Mayor",
                                                                 "State Legislature","Governor",
                                                                 "US House","US Senate","President")))) %>% 
  dplyr::filter(factor %in% racegender_ops) %>% 
  as.data.frame()

AME_m2_weights$factor = gsub("racegender","", AME_m2_weights$factor)

AME_m2_weights = AME_m2_weights %>%
  mutate(racegender = factor(factor, levels = c("White Women","Hispanic Women","Black Women","Hispanic Men","Black Men")))

ggplot(AME_m2_weights, aes(x=office, y=AME, group=racegender, linetype=racegender, color=racegender)) + 
  geom_pointrange(aes(ymin=lower, ymax=upper), position = position_dodge2(width = 0.5)) +
  scale_linetype_manual("Condition", values=c("solid","solid","solid","longdash","longdash")) +
  scale_color_manual("Condition", values=c("black", "gray40", "gray65","red","red4")) +
  theme_bw() +
  theme(legend.position="bottom") +
  theme(legend.title=element_blank()) +
  xlab("") + ylab("AME: Gender and Race\n") +
  theme(text = element_text(size=25))  +
  geom_hline(yintercept = 0, linetype="dotted",
             color = "red", size=0.75)  

ggsave(file="./SI_F_Results/figureF2.png", width = 45, height = 21, units = "cm")


##############
# Table F.17 #
##############

df_WEIGHT1 = df %>%
  dplyr::filter(!is.na(considered_running),
                !is.na(racegender),
                !is.na(political_interest),
                !is.na(n_offices_qualified),
                !is.na(partyID),
                !is.na(encouraged),
                !is.na(AGE7),
                !is.na(education),
                !is.na(income),
                !is.na(married))

df_WEIGHT1 = df_WEIGHT1 %>%
  as_survey(weights = c(WEIGHT1))

eq3 = considered_running ~ racegender + encouraged +
  racegender*encouraged +
  political_interest +
  n_offices_qualified +
  partyID +
  AGE7 +
  education +
  income +
  married

m3_WEIGHT1 = svyglm(eq3, family=quasibinomial(link='logit'), design = df_WEIGHT1, na.action = na.omit)

texreg(
  m3_WEIGHT1,
  file = "./SI_F_Results/tableF17.txt"
)


##############
# FIGURE F.3 #
##############

racegender_ops = paste0("racegender",  c("White Women","Hispanic Men","Hispanic Women","Black Men","Black Women"))  

AME_m3_weights = summary(margins(m3_WEIGHT1,design = df_WEIGHT1, 
                                                at = list(encouraged = c("None","Non-Political","Political","Both")))) %>% 
  dplyr::filter(factor %in% racegender_ops) %>% 
  as.data.frame()

AME_m3_weights$factor = gsub("racegender","",AME_m3_weights$factor)

AME_m3_weights = AME_m3_weights %>%
  mutate(racegender = factor(factor, levels = c("White Women","Hispanic Women","Black Women","Hispanic Men","Black Men")))

ggplot(AME_m3_weights, aes(x=encouraged, y=AME, group=racegender, linetype=racegender, color=racegender)) + 
  geom_pointrange(aes(ymin=lower, ymax=upper), position = position_dodge2(width = 0.5)) +
  scale_linetype_manual("Condition", values=c("solid","solid","solid","longdash","longdash")) +
  scale_color_manual("Condition", values=c("black", "gray40", "gray65","red","red4")) +
  theme_bw() +
  theme(legend.position="bottom") +
  theme(legend.title=element_blank()) +
  xlab("") + ylab("AME: Gender and Race\n") +
  theme(text = element_text(size=25))  +
  geom_hline(yintercept = 0, linetype="dotted",
             color = "red", size=0.75)  

ggsave(file="./SI_F_Results/figureF3.png", width = 37, height = 18, units = "cm")


##############
# FIGURE F.4 #
##############

df_no_controls_WEIGHT1 = df %>%
  dplyr::filter(!is.na(racegender),
                !is.na(WEIGHT1),
                !is.na(considered_running),
                !is.na(racegender),
                !is.na(political_interest),
                !is.na(n_offices_qualified),
                !is.na(partyID),
                !is.na(encouraged),
                !is.na(AGE7),
                !is.na(education),
                !is.na(income),
                !is.na(married))

df_no_controls_WEIGHT1 = df_no_controls_WEIGHT1 %>%
  as_survey(weights = c(WEIGHT1))

eq1 = considered_running ~ racegender

m1_wgt_no_control = svyglm(eq1, family=quasibinomial(link='logit'), design = df_no_controls_WEIGHT1, na.action = na.omit)

AME_m1_no_controls = summary(margins(m1_wgt_no_control, design = df_no_controls_WEIGHT1)) 

AME_m1_no_controls$factor = gsub("racegender","",AME_m1_no_controls$factor)

AME_m1_no_controls = AME_m1_no_controls %>%
  mutate(racegender = factor(factor, levels = c("Black Men","Hispanic Men","Black Women","Hispanic Women","White Women")))

ggplot(AME_m1_no_controls, aes(x=racegender, y=AME)) + 
  geom_pointrange(aes(ymin=lower, ymax=upper), position = position_dodge2(width = 0.5)) +
  theme_bw() +
  theme(legend.position="bottom") +
  theme(legend.title=element_blank()) +
  xlab("") + ylab("\nAME: Gender and Race\n") +
  theme(text = element_text(size=25))  +
  geom_hline(yintercept = 0, linetype="dotted",
             color = "red", size=1) +
  scale_y_continuous(limits = c(-0.3, 0.1)) +
  coord_flip() 

ggsave(file="./SI_F_Results/figureF4.png", width = 25, height = 17, units = "cm")


##############
# FIGURE F.5 #
##############

predict_df = data.frame(racegender = unique(df_long$racegender))
prob = predict(object = m1_wgt_no_control, 
                newdata = predict_df, 
                type = "response")
predict_df$prob = as.data.frame(prob)[,1]
predict_df$prob_se = as.data.frame(prob)[,2]

predict_df$racegender <- with(predict_df, reorder(racegender, prob))

ggplot(predict_df, aes(x=racegender, y=prob)) + 
  geom_pointrange(aes(ymin=prob-(1.96*prob_se), ymax=prob+(1.96*prob_se)), position = position_dodge2(width = 0.5)) +
  theme_bw() +
  theme(legend.position="bottom") +
  theme(legend.title=element_blank()) +
  xlab("") + ylab("\nPredicted Probability: Gender and Race\n") +
  theme(text = element_text(size=25))  +
  scale_y_continuous(limits = c(0, 0.4)) +
  coord_flip() 

ggsave(file="./SI_F_Results/figureF5.png", width = 25, height = 17, units = "cm")


##############
# TABLE F.18 #
##############

texreg(
  m1_wgt_no_control,
  file = "./SI_F_Results/tableF18.txt"
)


##############
# FIGURE F.6 #
##############

df_long_no_controls_WEIGHT1 = df_long %>%
  dplyr::filter(!is.na(racegender),
                !is.na(office),
                !is.na(CaseId),
                !is.na(considered_running),
                !is.na(racegender),
                !is.na(political_interest),
                !is.na(qualified_office),
                !is.na(partyID),
                !is.na(encouraged),
                !is.na(AGE7),
                !is.na(education),
                !is.na(income),
                !is.na(married))

cluster = svydesign(id = ~CaseId, weights = ~WEIGHT1, data = df_long_no_controls_WEIGHT1)

eq2 = considered_running ~ racegender + office +
  racegender*office

m2_wgt_no_control = svyglm(eq2, family=quasibinomial(link='logit'), design = cluster, na.action = na.omit)

racegender_ops = paste0("racegender",  c("White Women","Hispanic Men","Hispanic Women","Black Men","Black Women"))  

AME_m2_no_controls = summary(margins(m2_wgt_no_control, design = cluster, 
                                                        at = list(office = c("School Board","City Council","Mayor",
                                                                             "State Legislature","Governor",
                                                                             "US House","US Senate","President")))) %>% 
  dplyr::filter(factor %in% racegender_ops) %>% 
  as.data.frame()

AME_m2_no_controls$factor = gsub("racegender","",AME_m2_no_controls$factor)

AME_m2_no_controls = AME_m2_no_controls %>%
  mutate(racegender = factor(factor, levels = c("White Women","Hispanic Women","Black Women","Hispanic Men","Black Men")))

ggplot(AME_m2_no_controls, aes(x=office, y=AME, group=racegender, linetype=racegender, color=racegender)) + 
  geom_pointrange(aes(ymin=lower, ymax=upper), position = position_dodge2(width = 0.5)) +
  scale_linetype_manual("Condition", values=c("solid","solid","solid","longdash","longdash")) +
  scale_color_manual("Condition", values=c("black", "gray40", "gray65","red","red4")) +
  theme_bw() +
  theme(legend.position="bottom") +
  theme(legend.title=element_blank()) +
  xlab("") + ylab("AME: Gender and Race\n") +
  theme(text = element_text(size=25))  +
  geom_hline(yintercept = 0, linetype="dotted",
             color = "red", size=0.75)  

ggsave("./SI_F_Results/figureF6.png", width = 45, height = 21, units = "cm")


##############
# FIGURE F.7 #
##############

predict_df = expand_grid(racegender = unique(df_long$racegender), office = unique(df_long_no_controls_WEIGHT1$office))

prob <- predict(object = m2_wgt_no_control, 
                newdata = predict_df, 
                type = "response")
predict_df$prob = as.data.frame(prob)[,1]
predict_df$prob_se = as.data.frame(prob)[,2]

predict_df = predict_df %>%
  mutate(racegender = factor(racegender, levels = c("White Men","White Women","Hispanic Women","Black Women","Hispanic Men","Black Men")),
         office = factor(office, levels = c("School Board","City Council","Mayor","State Legislature",
                                            "Governor","US House","US Senate","President")))

ggplot(predict_df, aes(x=office, y=prob, group=racegender, linetype=racegender, color=racegender)) + 
  geom_pointrange(aes(ymin=prob-(1.96*prob_se), ymax=prob+(1.96*prob_se)), position = position_dodge2(width = 0.5)) +
  scale_linetype_manual("Condition", values=c("longdash","solid","solid","solid","longdash","longdash")) +
  scale_color_manual("Condition", values=c("blue","black", "gray40", "gray65","red","red4")) +
  theme_bw() +
  theme(legend.position="bottom") +
  theme(legend.title=element_blank()) +
  guides(linetype = guide_legend(nrow = 1), color = guide_legend(nrow = 1)) +
  xlab("") + ylab("Predicted Probabilities: Gender and Race\n") +
  theme(text = element_text(size=25))  

ggsave("./SI_F_Results/figureF7.png", width = 45, height = 21, units = "cm")


##############
# TABLE F.19 #
##############

texreg(
  m2_wgt_no_control,
  file = "./SI_F_Results/tableF19.txt"
)


##############
# FIGURE F.8 #
##############

# Education

m1_college = glm(considered_running ~ racegender +
                   political_interest +
                   n_offices_qualified +
                   partyID +
                   encouraged +
                   AGE7 +
                   income +
                   married, 
                 data = df[df$education>3,],
                 family = binomial)

m1_college_robust = coeftest(m1_college, vcov. = vcovHC(m1_college, type="HC1"))

# AME:

AME_college = summary(margins(m1_college, vcov = sandwich::vcovHC(m1_college, type = "HC1"))) %>% 
  dplyr::filter(grepl("race",factor)) %>% 
  as.data.frame()

AME_college$factor = gsub("racegender","",AME_college$factor)

AME_college = AME_college %>%
  mutate(racegender = factor(factor, levels = c("Black Men","Hispanic Men","Black Women","Hispanic Women","White Women")),
         sample = "College Degree")

# Interest in Politics

m1_interested_politics = glm(considered_running ~ racegender +
                               n_offices_qualified +
                               partyID +
                               encouraged +
                               AGE7 +
                               education +
                               income +
                               married, 
                             data = df[df$political_interest>quantile(df$political_interest, .5),],
                             family = binomial)

m1_interested_politics_robust = coeftest(m1_interested_politics, vcov. = vcovHC(m1_interested_politics, type="HC1"))

# AME:

AME_interested_politics = summary(margins(m1_interested_politics, vcov = sandwich::vcovHC(m1_interested_politics, type = "HC1"))) %>% 
  dplyr::filter(grepl("race",factor)) %>% 
  as.data.frame()

AME_interested_politics$factor = gsub("racegender","",AME_interested_politics$factor)

AME_interested_politics = AME_interested_politics %>%
  mutate(racegender = factor(factor, levels = c("Black Men","Hispanic Men","Black Women","Hispanic Women","White Women")),
         sample = "Interested in Politics")

# Qualified for Office

m1_qualified = glm(considered_running ~ racegender +
                     political_interest +
                     partyID +
                     encouraged +
                     AGE7 +
                     education +
                     income +
                     married, 
                   data = df[df$n_offices_qualified>quantile(df$n_offices_qualified, .5),], 
                   family = binomial)

m1_qualified_robust = coeftest(m1_qualified, vcov. = vcovHC(m1_qualified, type="HC1"))

# AME:

AME_qualified = summary(margins(m1_qualified, vcov = sandwich::vcovHC(m1_qualified, type = "HC1"))) %>% 
  dplyr::filter(grepl("race",factor)) %>% 
  as.data.frame()

AME_qualified$factor = gsub("racegender","",AME_qualified$factor)

AME_qualified = AME_qualified %>%
  mutate(racegender = factor(factor, levels = c("Black Men","Hispanic Men","Black Women","Hispanic Women","White Women")),
         sample = "Qualified for Office")

# Plot

results_sample = rbind.data.frame(AME_college,AME_interested_politics,AME_qualified)

ggplot(results_sample, aes(x=racegender, y=AME)) + 
  geom_pointrange(aes(ymin=lower, ymax=upper), position = position_dodge2(width = 0.5)) +
  theme_bw() +
  theme(legend.position="bottom") +
  theme(legend.title=element_blank()) +
  xlab("") + ylab("\nAME: Gender and Race\n") +
  theme(text = element_text(size=25))  +
  geom_hline(yintercept = 0, linetype="dotted",
             color = "red", size=1) +
  scale_y_continuous(limits = c(-0.36, 0.12)) +
  coord_flip() +
  facet_wrap(~sample)

ggsave("./SI_F_Results/figureF8.png", width = 37, height = 18, units = "cm")


##############
# TABLE F.20 #
##############

texreg(
  list(m1_college_robust, m1_interested_politics_robust, m1_qualified_robust),
  file = "./SI_F_Results/tableF20.txt"
)


##############
# FIGURE F.9 #
##############

# College Educated

m2_college = glm(considered_running ~ racegender + office +
                   racegender*office +
                   political_interest +
                   qualified_office +
                   partyID +
                   encouraged +
                   AGE7 +
                   income +
                   married, 
                 data = df_long[df_long$education>3,],
                 family = binomial)

m2_college_robust = coeftest(m2_college, vcov. = vcovCL(m2_college, vcov = vcovCL, cluster = ~CaseId))

# AME

racegender_ops = paste0("racegender",  c("White Women","Hispanic Men","Hispanic Women","Black Men","Black Women"))  

AME_df_racegender_college = summary(margins(m2_college, 
                                            at = list(office = c("School Board","City Council","Mayor",
                                                                 "State Legislature","Governor",
                                                                 "US House","US Senate","President")), vcov = sandwich::vcovCL(m2_college, vcov = vcovCL, cluster = ~CaseId))) %>% 
  dplyr::filter(factor %in% racegender_ops) %>% 
  as.data.frame()

AME_df_racegender_college$factor = gsub("racegender","",AME_df_racegender_college$factor)

AME_df_racegender_college = AME_df_racegender_college %>%
  mutate(racegender = factor(factor, levels = c("White Women","Hispanic Women","Black Women","Hispanic Men","Black Men")),
         sample = "College Degree")

# Interest in Politics

m2_interested = glm(considered_running ~ racegender + office +
                      racegender*office +
                      qualified_office +
                      partyID +
                      encouraged +
                      AGE7 +
                      education +
                      income +
                      married, 
                    data = df_long[df_long$political_interest>quantile(df_long$political_interest,.5),],
                    family = binomial)

m2_interested_robust = coeftest(m2_interested, vcov. = vcovCL(m2_interested, vcov = vcovCL, cluster = ~CaseId))

# AME

racegender_ops = paste0("racegender",  c("White Women","Hispanic Men","Hispanic Women","Black Men","Black Women"))  

AME_df_racegender_interested = summary(margins(m2_interested, 
                                               at = list(office = c("School Board","City Council","Mayor",
                                                                    "State Legislature","Governor",
                                                                    "US House","US Senate","President")), vcov = sandwich::vcovCL(m2_interested, vcov = vcovCL, cluster = ~CaseId))) %>% 
  dplyr::filter(factor %in% racegender_ops) %>% 
  as.data.frame()

AME_df_racegender_interested$factor = gsub("racegender","",AME_df_racegender_interested$factor)

AME_df_racegender_interested = AME_df_racegender_interested %>%
  mutate(racegender = factor(factor, levels = c("White Women","Hispanic Women","Black Women","Hispanic Men","Black Men")),
         sample = "Interested in Politics")

# Qualified for Office

m2_qualified = glm(considered_running ~ racegender + office +
                     racegender*office +
                     political_interest +
                     partyID +
                     encouraged +
                     AGE7 +
                     education +
                     income +
                     married, 
                   data = df_long[df_long$qualified_office>quantile(df_long$qualified_office,.5),],
                   family = binomial)

m2_qualified_robust = coeftest(m2_qualified, vcov. = vcovCL(m2_qualified, vcov = vcovCL, cluster = ~CaseId))

# AME

racegender_ops = paste0("racegender",  c("White Women","Hispanic Men","Hispanic Women","Black Men","Black Women"))  

AME_df_racegender_qualified = summary(margins(m2_qualified, 
                                              at = list(office = c("School Board","City Council","Mayor",
                                                                   "State Legislature","Governor",
                                                                   "US House","US Senate","President")), vcov = sandwich::vcovCL(m2_qualified, vcov = vcovCL, cluster = ~CaseId))) %>% 
  dplyr::filter(factor %in% racegender_ops) %>% 
  as.data.frame()

AME_df_racegender_qualified$factor = gsub("racegender","",AME_df_racegender_qualified$factor)

AME_df_racegender_qualified = AME_df_racegender_qualified %>%
  mutate(racegender = factor(factor, levels = c("White Women","Hispanic Women","Black Women","Hispanic Men","Black Men")),
         sample = "Qualified for Office")

# Plot
results_sample_m2 = rbind.data.frame(AME_df_racegender_college,AME_df_racegender_interested,AME_df_racegender_qualified)

ggplot(results_sample_m2, aes(x=office, y=AME, group=racegender, linetype=racegender, color=racegender)) + 
  geom_pointrange(aes(ymin=lower, ymax=upper), position = position_dodge2(width = 0.5)) +
  scale_linetype_manual("Condition", values=c("solid","solid","solid","longdash","longdash")) +
  scale_color_manual("Condition", values=c("black", "gray40", "gray65","red","red4")) +
  theme_bw() +
  theme(legend.position="bottom") +
  theme(legend.title=element_blank()) +
  xlab("") + ylab("AME: Gender and Race\n") +
  theme(text = element_text(size=25))  +
  geom_hline(yintercept = 0, linetype="dotted",
             color = "red", size=0.75) +
  facet_wrap(~sample, ncol=1)

ggsave("./SI_F_Results/figureF9.png", width = 42, height = 26, units = "cm")


##############
# TABLE F.21 #
##############

texreg(
  list(m2_college_robust, m2_interested_robust, m2_qualified_robust),
  file = "./SI_F_Results/tableF21.txt"
)


###############
# FIGURE F.10 #
###############

# Interested in Politics

m3_interested_politics_enc = glm(considered_running ~ racegender + encouraged +
                                   racegender*encouraged +
                                   n_offices_qualified +
                                   partyID +
                                   AGE7 +
                                   education +
                                   income +
                                   married, 
                                 data = df[df$political_interest>quantile(df$political_interest,.5),],
                                 family = binomial)

m3_interested_politics_enc_robust = coeftest(m3_interested_politics_enc, vcov. = vcovHC(m3_interested_politics_enc, type="HC1"))

# AME

racegender_ops = paste0("racegender",  c("White Women","Hispanic Men","Hispanic Women","Black Men","Black Women"))  

AME_df_racegender_enc_interested_politics = summary(margins(m3_interested_politics_enc, 
                                                            at = list(encouraged = c("None","Non-Political","Political","Both")), vcov = sandwich::vcovHC(m3_interested_politics_enc, type="HC1"))) %>% 
  dplyr::filter(factor %in% racegender_ops) %>% 
  as.data.frame()

AME_df_racegender_enc_interested_politics$factor = gsub("racegender","",AME_df_racegender_enc_interested_politics$factor)

AME_df_racegender_enc_interested_politics = AME_df_racegender_enc_interested_politics %>%
  mutate(racegender = factor(factor, levels = c("White Women","Hispanic Women","Black Women","Hispanic Men","Black Men")),
         sample = "Interested in Politics")

# Qualified for Office

m3_qualified_enc = glm(considered_running ~ racegender + encouraged +
                         racegender*encouraged +
                         political_interest +
                         partyID +
                         AGE7 +
                         education +
                         income +
                         married, 
                       data = df[df$n_offices_qualified>quantile(df$n_offices_qualified,.5),],
                       family = binomial)

# get results with clustered standard errors (of type HC1)
m3_qualified_enc_robust = coeftest(m3_qualified_enc, vcov. = vcovHC(m3_qualified_enc, type="HC1"))

# AME

racegender_ops = paste0("racegender",  c("White Women","Hispanic Men","Hispanic Women","Black Men","Black Women"))  

AME_df_racegender_enc_qualified = summary(margins(m3_qualified_enc, 
                                                  at = list(encouraged = c("None","Non-Political","Political","Both")), vcov = sandwich::vcovHC(m3_qualified_enc, type="HC1"))) %>% 
  dplyr::filter(factor %in% racegender_ops) %>% 
  as.data.frame()

AME_df_racegender_enc_qualified$factor = gsub("racegender","",AME_df_racegender_enc_qualified$factor)

AME_df_racegender_enc_qualified = AME_df_racegender_enc_qualified %>%
  mutate(racegender = factor(factor, levels = c("White Women","Hispanic Women","Black Women","Hispanic Men","Black Men")),
         sample = "Qualified for Office")

# Plot
results_sample_m3 = rbind.data.frame(AME_df_racegender_enc_interested_politics,AME_df_racegender_enc_qualified)

ggplot(results_sample_m3, aes(x=encouraged, y=AME, group=racegender, linetype=racegender, color=racegender)) + 
  geom_pointrange(aes(ymin=lower, ymax=upper), position = position_dodge2(width = 0.5)) +
  scale_linetype_manual("Condition", values=c("solid","solid","solid","longdash","longdash")) +
  scale_color_manual("Condition", values=c("black", "gray40", "gray65","red","red4")) +
  theme_bw() +
  theme(legend.position="bottom") +
  theme(legend.title=element_blank()) +
  xlab("") + ylab("AME: Gender and Race\n") +
  theme(text = element_text(size=25))  +
  geom_hline(yintercept = 0, linetype="dotted",
             color = "red", size=0.75) +
  facet_wrap(~sample, ncol=1)

ggsave("./SI_F_Results/figureF10.png", width = 38, height = 18, units = "cm")


##############
# TABLE F.22 #
##############

texreg(
  list(m3_interested_politics_enc_robust, m3_qualified_enc_robust),
  file = "./SI_F_Results/tableF22.txt"
)


###############
# FIGURE F.11 #
###############

# Subset to Hispanics who completed the interview in Spanish

df_sub = df %>%
  dplyr::filter(!(grepl("Hispanic", racegender) & spanish_interview==0))

m1_del_hisp_engl = glm(considered_running ~ racegender +
                         political_interest +
                         n_offices_qualified +
                         partyID +
                         encouraged +
                         AGE7 +
                         education +
                         income +
                         married, 
                       data = df_sub,
                       family = binomial)

m1_del_hisp_engl_robust = coeftest(m1_del_hisp_engl, vcov. = vcovHC(m1_del_hisp_engl, type="HC1"))

# AME:
AME_del_hisp_engl = summary(margins(m1_del_hisp_engl, vcov = sandwich::vcovHC(m1_del_hisp_engl, type = "HC1"))) %>% 
  dplyr::filter(grepl("race",factor)) %>% 
  as.data.frame()

AME_del_hisp_engl$factor = gsub("racegender","",AME_del_hisp_engl$factor)

AME_del_hisp_engl = AME_del_hisp_engl %>%
  mutate(racegender = factor(factor, levels = c("Black Men","Hispanic Men","Black Women","Hispanic Women","White Women")),
         sample = "Interview in Spanish")


# Restricted to Hispanics who speak Spanish well

df_sub = df %>%
  dplyr::filter(!(grepl("Hispanic", racegender) & speak_spanish>2))

m1_del_hisp_not_fluent_spanish = glm(considered_running ~ racegender +
                                       political_interest +
                                       n_offices_qualified +
                                       partyID +
                                       encouraged +
                                       AGE7 +
                                       education +
                                       income +
                                       married, 
                                     data = df_sub,
                                     family = binomial)

m1_del_hisp_not_fluent_spanish_robust = coeftest(m1_del_hisp_not_fluent_spanish, vcov. = vcovHC(m1_del_hisp_not_fluent_spanish, type="HC1"))

# AME:
AME_del_hisp_not_fluent_spanish = summary(margins(m1_del_hisp_not_fluent_spanish, vcov = sandwich::vcovHC(m1_del_hisp_not_fluent_spanish, type = "HC1"))) %>% 
  dplyr::filter(grepl("race",factor)) %>% 
  as.data.frame()

AME_del_hisp_not_fluent_spanish$factor = gsub("racegender","",AME_del_hisp_not_fluent_spanish$factor)

AME_del_hisp_not_fluent_spanish = AME_del_hisp_not_fluent_spanish %>%
  mutate(racegender = factor(factor, levels = c("Black Men","Hispanic Men","Black Women","Hispanic Women","White Women")),
         sample = "Fluent in Spanish")

# Restricted to Hispanics with at least one foreign-born parent

df_sub = df %>%
  dplyr::filter(!(grepl("Hispanic", racegender) & parents_born_us==1)) 

m1_del_hisp_parents_born_us = glm(considered_running ~ racegender +
                                    political_interest +
                                    n_offices_qualified +
                                    partyID +
                                    encouraged +
                                    AGE7 +
                                    education +
                                    income +
                                    married, 
                                  data = df_sub,
                                  family = binomial)

m1_del_hisp_parents_born_us_robust = coeftest(m1_del_hisp_parents_born_us, vcov. = vcovHC(m1_del_hisp_parents_born_us, type="HC1"))

# AME:
AME_del_hisp_parents_born_us = summary(margins(m1_del_hisp_parents_born_us, vcov = sandwich::vcovHC(m1_del_hisp_parents_born_us, type = "HC1"))) %>% 
  dplyr::filter(grepl("race",factor)) %>% 
  as.data.frame()

AME_del_hisp_parents_born_us$factor = gsub("racegender","",AME_del_hisp_parents_born_us$factor)

AME_del_hisp_parents_born_us = AME_del_hisp_parents_born_us %>%
  mutate(racegender = factor(factor, levels = c("Black Men","Hispanic Men","Black Women","Hispanic Women","White Women")),
         sample = "Parent(s) born Abroad")

# Plot

hips_plot = rbind.data.frame(AME_del_hisp_parents_born_us, AME_del_hisp_not_fluent_spanish,AME_del_hisp_engl)

ggplot(hips_plot, aes(x=racegender, y=AME)) + 
  geom_pointrange(aes(ymin=lower, ymax=upper), position = position_dodge2(width = 0.5)) +
  theme_bw() +
  theme(legend.position="bottom") +
  theme(legend.title=element_blank()) +
  xlab("") + ylab("\nAME: Gender and Race\n") +
  theme(text = element_text(size=25))  +
  geom_hline(yintercept = 0, linetype="dotted",
             color = "red", size=1) +
  scale_y_continuous(limits = c(-0.29, 0.15)) +
  coord_flip() +
  facet_wrap(~sample)

ggsave("./SI_F_Results/figureF11.png", width = 37, height = 18, units = "cm")


##############
# TABLE F.23 #
##############

texreg(
  list(m1_del_hisp_not_fluent_spanish_robust,m1_del_hisp_engl_robust,m1_del_hisp_parents_born_us_robust),
  file = "./SI_F_Results/tableF23.txt"
)


###############
# FIGURE F.12 #
###############

m1_int_party = glm(considered_running ~ racegender +
                     racegender*partyID +
                     political_interest +
                     n_offices_qualified +
                     partyID +
                     encouraged +
                     AGE7 +
                     education +
                     income +
                     married, 
                   data = df,
                   family = binomial)

m1_int_party_robust = coeftest(m1_int_party, vcov. = vcovHC(m1_int_party, type="HC1"))

AME_racegender_by_party = summary(margins(m1_int_party, 
                                           at = list(partyID = c("Republican","Democrat","Independent")))) %>% 
  dplyr::filter(grepl("racegender", factor)) %>% 
  as.data.frame()

AME_racegender_by_party$factor = gsub("racegender","",AME_racegender_by_party$factor)

AME_racegender_by_party = AME_racegender_by_party %>%
  mutate(racegender = factor(factor, levels = c("Black Men","Hispanic Men","Black Women","Hispanic Women","White Women")),
         partyID = fct_relevel(partyID, c("Democrat","Independent","Republican")))

ggplot(AME_racegender_by_party, aes(x=racegender, y=AME)) + 
  geom_pointrange(aes(ymin=lower, ymax=upper), position = position_dodge2(width = 0.5)) +
  theme_bw() +
  theme(legend.position="bottom") +
  theme(legend.title=element_blank()) +
  xlab("") + ylab("\nAME: Gender and Race\n") +
  theme(text = element_text(size=25))  +
  geom_hline(yintercept = 0, linetype="dotted",
             color = "red", size=1) +
  scale_y_continuous(limits = c(-0.33, 0.1)) +
  coord_flip() +
  facet_wrap(~partyID)

ggsave("./SI_F_Results/figureF12.png", width = 38, height = 18, units = "cm")


##############
# TABLE F.24 #
##############

texreg(
  m1_int_party_robust,
  file = "./SI_F_Results/tableF24.txt"
)


###############
# FIGURE F.13 #
###############

m2 = glm(considered_running ~ racegender + office +
           racegender*office +
           political_interest +
           qualified_office +
           partyID +
           encouraged +
           AGE7 +
           education +
           income +
           married, 
         data = df_long,
         family = binomial)

m2_qualif_dv = glm(qualified_office ~ racegender + office +
                     racegender*office +
                     political_interest +
                     partyID +
                     encouraged +
                     AGE7 +
                     education +
                     income +
                     married, 
                   data = df_long[!is.na(df_long$considered_running),],
                   family = binomial)

m2_qualif_dv_robust = coeftest(m2_qualif_dv, vcov. = vcovCL(m2_qualif_dv, vcov = vcovCL, cluster = ~CaseId))

m2_data = m2[["model"]]

data_predictions = expand_grid(qualified_office = c(0:1), racegender = as.character(unique(m2_data$racegender)), office = unique(m2_data$office)) %>%
  mutate(political_interest = mean(m2_data$political_interest),
         encouraged = Mode(m2_data$encouraged),
         partyID = Mode(m2_data$partyID),
         AGE7 = mean(m2_data$AGE7),
         education = mean(m2_data$education),
         income = mean(m2_data$income),
         married = Mode(m2_data$married),
         considered_running = 1)

n_pred = nrow(data_predictions)

data_predictions = rbind.data.frame(data_predictions, m2_data[(n_pred+1):nrow(m2_data),])

predictions = as.data.frame(get_predicted(m2, newdata = data_predictions, vcov =  vcovCL(m2,  cluster = ~CaseId),
                                          ci = 0.95))

data_predictions = data_predictions[1:n_pred,] %>%
  mutate(predicted = predictions$Predicted[1:n_pred],
         ci_low = predictions$CI_low[1:n_pred],
         ci_high = predictions$CI_high[1:n_pred])

data_predictions = data_predictions %>%
  ungroup() %>%
  dplyr::filter( (qualified_office==1 & racegender!="White Men") | (qualified_office==0 & racegender=="White Men")) %>%
  mutate(color = ifelse(racegender=="White Men", "Unqualified", "Qualified"),
         racegender = factor(racegender, levels = c("Black Men","Hispanic Men","Black Women","Hispanic Women","White Women","White Men")),
         office = factor(office, levels = c("School Board","City Council","Mayor",
                                            "State Legislature","Governor",
                                            "US House","US Senate","President")) )

ggplot(data_predictions, aes(x=racegender, y=predicted, colour = color)) + 
  geom_pointrange(aes(ymin=ci_low, ymax=ci_high), position = position_dodge2(width = 0.5)) +
  scale_color_manual("Condition", values=c("black", "red4")) +
  theme_bw() +
  theme(legend.position="bottom") +
  theme(legend.title=element_blank()) +
  xlab("") + ylab("\nAME: Gender and Race\n") +
  theme(text = element_text(size=25))  +
  coord_flip() +
  facet_wrap(~office)

ggsave("./SI_F_Results/figureF13.png", width = 40, height = 25, units = "cm")


###############
# FIGURE F.14 #
###############

racegender_ops = paste0("racegender",  c("White Women","Hispanic Men","Hispanic Women","Black Men","Black Women"))  

AME_df_racegender_qualif = summary(margins(m2_qualif_dv, 
                                           at = list(office = c("School Board","City Council","Mayor",
                                                                "State Legislature","Governor",
                                                                "US House","US Senate","President")), vcov = sandwich::vcovCL(m2_qualif_dv, vcov = vcovCL, cluster = ~CaseId))) %>% 
  dplyr::filter(factor %in% racegender_ops) %>% 
  as.data.frame()

AME_df_racegender_qualif$factor = gsub("racegender","",AME_df_racegender_qualif$factor)

AME_df_racegender_qualif = AME_df_racegender_qualif %>%
  mutate(racegender = factor(factor, levels = c("White Women","Hispanic Women","Black Women","Hispanic Men","Black Men")))

ggplot(AME_df_racegender_qualif, aes(x=office, y=AME, group=racegender, linetype=racegender, color=racegender)) + 
  geom_pointrange(aes(ymin=lower, ymax=upper), position = position_dodge2(width = 0.5)) +
  scale_linetype_manual("Condition", values=c("solid","solid","solid","longdash","longdash")) +
  scale_color_manual("Condition", values=c("black", "gray40", "gray65","red","red4")) +
  theme_bw() +
  theme(legend.position="bottom") +
  theme(legend.title=element_blank()) +
  xlab("") + ylab("AME: Gender and Race\n") +
  theme(text = element_text(size=25))  +
  geom_hline(yintercept = 0, linetype="dotted",
             color = "red", size=0.75)  

ggsave("./SI_F_Results/figureF14.png", width = 45, height = 21, units = "cm")


##############
# TABLE F.25 #
##############

# Table
texreg(m2_qualif_dv_robust)

texreg(
  m1_int_party_robust,
  file = "./SI_F_Results/tableF25.txt"
)

